# Goal: Generate ideological indices (additive, PCA, IRT) for Sample 1
# input: data/orig_sample1.dta
# output: data/sample1.RData

# by Jennifer Pan and Yiqing Xu

# Takes ~90 minutes to run on a M1 Mac Ultra (20 cores; 128G RAM)

## 1. multiple imputation
## 2. simple index without weighting
## 3. PCA in each dimension
## 4. Repeat 1-3 for wave 2
## 5. create IRT measure for wave 1


# load data
d <- as.data.frame(haven::read_dta("data/orig_sample1.dta"))
head(d)
names(d)

# wave 1 policy questions
question.list <- list(paste0("s1_",1:14), # poli
                      paste0("s2_",1:14), # econ
                      paste0("s3_",1:14), # nati
                      paste0("s4_",1:7), # trad
                      paste0("s5_",1:7), # equi
                      paste0("s6_",1:7)) # ethn

# wave 2 policy questions
question.list2 <- list(paste0("t1_",1:14), # poli
                      paste0("t2_",1:14), # econ 
                      paste0("t3_",1:14), # nati
                      paste0("t4_",1:7), # trad
                      paste0("t5_",1:7), # equi
                      paste0("t6_",1:7)) # ethn

sign.list <- list(
  c(1, -1, -1, 1, -1, 1, -1, 1, 1, 1, -1, -1, 1, -1), # nationalism * (-1)
  c(-1, -1, 1, 1, -1, -1, 1, 1, -1, 1, -1, -1, 1, 1), # political
  c(-1, 1, -1, -1, 1, 1, 1, -1, 1, -1, 1, -1, 1, -1), # market
  c(1, -1, -1, -1, 1, 1, 1), # traditionalism * (-1)
  c(-1, 1, -1, 1, 1, 1, -1), # social equality
  c(1, -1, 1, -1, -1, 1, -1)  # minority accommodation
)


#######################################################
## Multiple Imputation
#######################################################

library(lavaan)
library(Amelia)
dim(d)

#############################
## Additive & PCA: Wave 1
#############################

## multiple imputation
vars <- unlist(question.list)
vars2 <- c(paste0("support_", 1:4), paste0("sdo_", 1:5), paste0("auth_", 1:5))
d1 <- d[, c("id", vars, vars2)]
set.seed(02139)
nsims <- 100
mi.out <- amelia(d1, m = nsims, idvars = "id", ords = vars, 
                 parallel = "snow", ncpus = 10)


d0 <- mi.out$imputations$imp1 # take the first copy
d0$id <- NULL
dim(d0)

## regime support, SDO, AUTH, and Additive indices, PCA measures
support <- matrix(NA, nrow(d), nsims)
sdo <- matrix(NA, nrow(d), nsims)
auth <- matrix(NA, nrow(d), nsims)
simple.index <- array(NA, dim = c(nrow(d), 6, nsims))
thetas.pca <- array(NA, dim = c(nrow(d), 7, nsims))
alpha0.pca <- matrix(NA, 63, nsims)
alphas.pca <- matrix(NA, 63, nsims)
for (i in 1:nsims) {
  d0 <- mi.out$imputations[[i]]
  d0$id <- NULL
  ## support for government
  support[,i] <- (d0$support_1 + d0$support_2 + d0$support_3 + d0$support_4)/4
  ## SDO and AUTH
  sdo[,i] <- d0$sdo_1 + d0$sdo_2 - d0$sdo_3 - d0$sdo_4 + d0$sdo_5
  auth[,i] <- d0$auth_1 + d0$auth_2 - d0$auth_3 - d0$auth_4 + d0$auth_5
  ## simple indicies
  simple.index[,1,i] <- (d0$s1_1 - d0$s1_2 - d0$s1_3 + d0$s1_4 - d0$s1_5 + d0$s1_6 - d0$s1_7 + d0$s1_8 + d0$s1_9 + d0$s1_10 - d0$s1_11 - d0$s1_12 + d0$s1_13 - d0$s1_14)/14
  simple.index[,2,i] <- (- d0$s2_1 - d0$s2_2 + d0$s2_3 + d0$s2_4 - d0$s2_5 - d0$s2_6 + d0$s2_7 + d0$s2_8 - d0$s2_9 + d0$s2_10 - d0$s2_11 - d0$s2_12 + d0$s2_13 + d0$s2_14)/14
  simple.index[,3,i] <- (- d0$s3_1 + d0$s3_2 - d0$s3_3 - d0$s3_4 + d0$s3_5 + d0$s3_6 + d0$s3_7 - d0$s3_8 + d0$s3_9 - d0$s3_10 + d0$s3_11 - d0$s3_12 + d0$s3_13 - d0$s3_14)/14
  simple.index[,4,i] <- (d0$s4_1 - d0$s4_2 - d0$s4_3 - d0$s4_4 + d0$s4_5 + d0$s4_6 + d0$s4_7)/7
  simple.index[,5,i] <- (-d0$s5_1 + d0$s5_2 - d0$s5_3 + d0$s5_4 + d0$s5_5 + d0$s5_6 - d0$s5_7)/7
  simple.index[,6,i] <- (d0$s6_1 - d0$s6_2 + d0$s6_3 - d0$s6_4 - d0$s6_5 + d0$s6_6 - d0$s6_7)/7
  ## ideological measures based on PCA
  pca.out<-prcomp(d0[,1:63], center = TRUE, scale = TRUE)
  pca.out1<-prcomp(d0[,1:14], center = TRUE, scale = TRUE)
  pca.out2<-prcomp(d0[,15:28], center = TRUE, scale = TRUE)
  pca.out3<-prcomp(d0[,29:42], center = TRUE, scale = TRUE)
  pca.out4<-prcomp(d0[,43:49], center = TRUE, scale = TRUE)
  pca.out5<-prcomp(d0[,50:56], center = TRUE, scale = TRUE)
  pca.out6<-prcomp(d0[,57:63], center = TRUE, scale = TRUE)
  alpha0.pca[,i] <- pca.out$rotation[,1]
  alphas.pca[,i] <- c(
    pca.out1$rotation[,1]*(-1), pca.out2$rotation[,1]*(-1), 
    pca.out3$rotation[,2], pca.out4$rotation[,1]*(-1), 
    pca.out5$rotation[,2], pca.out6$rotation[,1]*(-1))
  thetas.pca[,1,i] <- pca.out$x[,1]
  thetas.pca[,2,i] <- pca.out1$x[,1]*(-1)
  thetas.pca[,3,i] <- pca.out2$x[,1]*(-1)
  thetas.pca[,4,i] <- pca.out3$x[,2]
  thetas.pca[,5,i] <- pca.out4$x[,1]*(-1)
  thetas.pca[,6,i] <- pca.out5$x[,2]
  thetas.pca[,7,i] <- pca.out6$x[,1]*(-1)
  cat(i)
}

## indices
idx_support <- apply(support, 1, mean)
idx_sdo <- apply(sdo, 1, mean)
idx_auth <- apply(auth, 1, mean)
## simple indices
index <- apply(simple.index, c(1,2), mean)
colnames(index) <- c("index_nati", "index_poli", "index_econ", "index_trad", "index_equi", "index_ethn")
head(index)
## 63 questions all together
alpha0.pca <- apply(alpha0.pca, 1, mean)
## 6 dimensions separately
alphas.pca <- apply(alphas.pca, 1, mean)
## ideology by pca
ideology_pca <- apply(thetas.pca, c(1,2), mean)
colnames(ideology_pca) <- c("pca_all", "pca_nati", "pca_poli", "pca_econ", "pca_trad", "pca_equi", "pca_ethn")
head(ideology_pca)

d <- cbind.data.frame(d, idx_support, idx_sdo, idx_auth, index, ideology_pca)
names(d)

d$index_nati <- d$index_nati * (-1)
d$index_trad <- d$index_trad * (-1)
d$pca_nati <- d$pca_nati * (-1)
d$pca_trad <- d$pca_trad * (-1)
d$pca_equi <- d$pca_equi * (-1)

# plot(d$index_nati, d$pca_nati)
# plot(d$index_poli, d$pca_poli)
# plot(d$index_econ, d$pca_econ)
# plot(d$index_trad, d$pca_trad)
# plot(d$index_equi, d$pca_equi)
# plot(d$index_ethn, d$pca_ethn)

cor(d$index_nati, d$pca_nati) # 0.93
cor(d$index_poli, d$pca_poli) # 0.96
cor(d$index_econ, d$pca_econ) # 0.88
cor(d$index_trad, d$pca_trad) # 0.92
cor(d$index_equi, d$pca_equi) # 0.88
cor(d$index_ethn, d$pca_ethn) # 0.96


#############################
## Additive & PCA: Wave 2
#############################

## multiple imputation
vars <- unlist(question.list2)
d2 <- d[which(d$wave2 == 1), c("id", vars)]
dim(d2)
set.seed(02139)
nsims <- 100
mi.out <- amelia(d2, m = nsims, idvars = "id", ords = vars, 
                 parallel = "snow", ncpus = 8)

#############################
## PCA
#############################

## Additive indcies & PCA measures
simple.index <- array(NA, dim = c(nrow(d2), 6, nsims))
thetas.pca <- array(NA, dim = c(nrow(d2), 7, nsims))
alpha0.pca <- matrix(NA, 63, nsims) ## giant PCA
alphas.pca <- matrix(NA, 63, nsims)
for (i in 1:nsims) {
  d0 <- mi.out$imputations[[i]]
  d0$id <- NULL
  ## simple indices
  simple.index[,1,i] <- (d0$t1_1 - d0$t1_2 - d0$t1_3 + d0$t1_4 - d0$t1_5 + d0$t1_6 - d0$t1_7 + d0$t1_8 + d0$t1_9 + d0$t1_10 - d0$t1_11 - d0$t1_12 + d0$t1_13 - d0$t1_14)/14
  simple.index[,2,i] <- (- d0$t2_1 - d0$t2_2 + d0$t2_3 + d0$t2_4 - d0$t2_5 - d0$t2_6 + d0$t2_7 + d0$t2_8 - d0$t2_9 + d0$t2_10 - d0$t2_11 - d0$t2_12 + d0$t2_13 + d0$t2_14)/14
  simple.index[,3,i] <- (- d0$t3_1 + d0$t3_2 - d0$t3_3 - d0$t3_4 + d0$t3_5 + d0$t3_6 + d0$t3_7 - d0$t3_8 + d0$t3_9 - d0$t3_10 + d0$t3_11 - d0$t3_12 + d0$t3_13 - d0$t3_14)/14
  simple.index[,4,i] <- (d0$t4_1 - d0$t4_2 - d0$t4_3 - d0$t4_4 + d0$t4_5 + d0$t4_6 + d0$t4_7)/7
  simple.index[,5,i] <- (-d0$t5_1 + d0$t5_2 - d0$t5_3 + d0$t5_4 + d0$t5_5 + d0$t5_6 - d0$t5_7)/7
  simple.index[,6,i] <- (d0$t6_1 - d0$t6_2 + d0$t6_3 - d0$t6_4 - d0$t6_5 + d0$t6_6 - d0$t6_7)/7
  ## ideological measures based on PCA
  pca.out<-prcomp(d0[,1:63], center = TRUE, scale = TRUE)
  pca.out1<-prcomp(d0[,1:14], center = TRUE, scale = TRUE)
  pca.out2<-prcomp(d0[,15:28], center = TRUE, scale = TRUE)
  pca.out3<-prcomp(d0[,29:42], center = TRUE, scale = TRUE)
  pca.out4<-prcomp(d0[,43:49], center = TRUE, scale = TRUE)
  pca.out5<-prcomp(d0[,50:56], center = TRUE, scale = TRUE)
  pca.out6<-prcomp(d0[,57:63], center = TRUE, scale = TRUE)
  alpha0.pca[,i] <- pca.out$rotation[,1]
  alphas.pca[,i] <- c(
    pca.out1$rotation[,1]*(-1), pca.out2$rotation[,1], 
    pca.out3$rotation[,1], pca.out4$rotation[,1], 
    pca.out5$rotation[,2]*(-1), pca.out6$rotation[,1])
  thetas.pca[,1,i] <- pca.out$x[,1]
  thetas.pca[,2,i] <- pca.out1$x[,1]*(-1)
  thetas.pca[,3,i] <- pca.out2$x[,1]
  thetas.pca[,4,i] <- pca.out3$x[,1]
  thetas.pca[,5,i] <- pca.out4$x[,1]
  thetas.pca[,6,i] <- pca.out5$x[,2]*(-1)
  thetas.pca[,7,i] <- pca.out6$x[,1]
  cat(i)
}

## simple indices
index <- apply(simple.index, c(1,2), mean)
colnames(index) <- c("index2_nati", "index2_poli", "index2_econ", 
                     "index2_trad", "index2_equi", "index2_ethn")
head(index)
## 63 questions all together
alpha0.pca <- apply(alpha0.pca, 1, mean)
## 6 dimensions separately
alphas.pca <- apply(alphas.pca, 1, mean)
## ideology by pca
ideology_pca <- apply(thetas.pca, c(1,2), mean)
colnames(ideology_pca) <- c("pca2_all", "pca2_nati", "pca2_poli", 
                            "pca2_econ", "pca2_trad", "pca2_equi", "pca2_ethn")
head(ideology_pca)

d2 <- cbind.data.frame(d2, index, ideology_pca)
names(d2)

# scale
vars <-  c("index2_nati","index2_poli","index2_econ","index2_trad","index2_equi","index2_ethn",
           "pca2_all","pca2_nati","pca2_poli","pca2_econ","pca2_trad","pca2_equi","pca2_ethn")  
for (i in 1:length(vars)) {
  d2[,vars[i]] <- scale(d2[,vars[i]])
}

d2$index2_nati <- d2$index2_nati * (-1)
d2$index2_trad <- d2$index2_trad * (-1)
d2$pca2_nati <- d2$pca2_nati * (-1)
d2$pca2_trad <- d2$pca2_trad * (-1)

# plot(d2$index2_nati, d2$pca2_nati)
# plot(d2$index2_poli, d2$pca2_poli)
# plot(d2$index2_econ, d2$pca2_econ)
# plot(d2$index2_trad, d2$pca2_trad)
# plot(d2$index2_equi, d2$pca2_equi)
# plot(d2$index2_ethn, d2$pca2_ethn)

cor(d2$index2_nati, d2$pca2_nati)
cor(d2$index2_poli, d2$pca2_poli) 
cor(d2$index2_econ, d2$pca2_econ) 
cor(d2$index2_trad, d2$pca2_trad) 
cor(d2$index2_equi, d2$pca2_equi) 
cor(d2$index2_ethn, d2$pca2_ethn) 


## Merge into d
vars_to_keep <- c("id","index2_nati", "index2_poli", "index2_econ", "index2_trad", "index2_equi",
                  "index2_ethn", "pca2_all", "pca2_nati", "pca2_poli", "pca2_econ",
                  "pca2_trad", "pca2_equi", "pca2_ethn")

d <- merge(d, d2[vars_to_keep], by = "id", all.x = TRUE)
dim(d)
names(d)


###############################
# IRT Measure (Wave 1 only)
###############################

library("R2jags")


## storage
alphas <- matrix(NA, 14, 6)
thetas <- matrix(NA, nrow(d), 6)
colnames(thetas) <- paste0("irt_",c("nati","poli","econ","trad","equi","ethn"))
begin.time<-Sys.time()
for (i in 1:6) {
  cat("\n",i,": ")
  questions <- question.list[[i]]
  sign <- sign.list[[i]]
  p <- length(questions) # number of questions
  Y <- as.matrix(d[, questions])
  for (j in 1:p) {
    if (sign[j] == -1) {
      Y[,j] <- 6 - Y[,j] # reverse the answer
    }
  }
  n <- nrow(Y)
  m.alpha <- 1.0 ## we allow alpha to be negative
  s.alpha <- 1.0
  m.kappa <- 0.0
  s.kappa <- 1.0
  K <- rep(5,14)
  data <- list("Y", "n", "p", "K",
               "m.alpha", "s.alpha",
               "m.kappa", "s.kappa")
  monitor <- c("alpha", "theta", "kappa")
  jags.file <- file.path(getwd(), "code/support/grmjags.bug")
  inits <- function(){
    kappa.star <- t(sapply(1:p, function(j) c(rnorm(K[j] - 1), rep(NA, max(K) - K[j]))))
    list(alpha=abs(rnorm(p)),
         theta=rnorm(n),
         kappa.star=kappa.star)
  }
  ## JAGS needs extra "data" to specify undefined kappa values as 0.0
  kappa <- t(sapply(1:p, function(j) c(rep(NA, K[j]-1), rep(0.0, max(K) - K[j]))))
  data <- c(data, "kappa")
  ## JAGS doesn't seem to have a problem with n.burnin=1,000    
  jagsout <- jags.parallel(data=data, inits=inits, parameters.to.save=monitor,
                           model.file=jags.file, n.chains=4, n.cluster=4,
                           n.iter=3500, n.thin=1, n.burnin=1000)
  out <- jagsout$BUGSoutput
  ## save difficulty
  alphas[1:p,i] <- out$median$alpha
  cat(round(out$median$alpha,2))
  ## save ability
  thetas[,i]<-out$median$theta     
  ## time spent
  cat("\n")
  print(Sys.time()-begin.time)
  cat("\n")
}

## merge
alphas.irt <- c(alphas[,1], alphas[,2], alphas[,3], alphas[1:7,4], alphas[1:7,5], alphas[1:7,6])
d <- cbind.data.frame(d, thetas)
names(d)

vars <-  c("idx_support","idx_sdo","idx_auth",
           "index_nati","index_poli","index_econ","index_trad","index_equi","index_ethn",
           "pca_all","pca_nati","pca_poli","pca_econ","pca_trad","pca_equi","pca_ethn",
           "irt_nati","irt_poli","irt_econ" ,"irt_trad" ,"irt_equi","irt_ethn")  
for (i in 1:length(vars)) {
  d[,vars[i]] <- scale(d[,vars[i]])
}

d$irt_nati <- d$irt_nati * (-1)
d$irt_trad <- d$irt_trad * (-1)

cor(d$index_nati, d$irt_nati) 
cor(d$index_poli, d$irt_poli) 
cor(d$index_econ, d$irt_econ) 
cor(d$index_trad, d$irt_trad) 
cor(d$index_equi, d$irt_equi) 
cor(d$index_ethn, d$irt_ethn) 

haven::write_dta(d, "data/sample1.dta", version = 12)


